change illegal xml character stripping from QRegExp/QRegularExpression to QTextCodec...
authortsteven4 <tsteven4@f51c46e8-681c-474f-0cfe-069cfd0219fb>
Tue, 17 Sep 2013 01:12:47 +0000 (01:12 +0000)
committertsteven4 <tsteven4@f51c46e8-681c-474f-0cfe-069cfd0219fb>
Tue, 17 Sep 2013 01:12:47 +0000 (01:12 +0000)
gpsbabel/gpx.cc
gpsbabel/kml.cc
gpsbabel/src/core/xmlstreamwriter.cc
gpsbabel/src/core/xmlstreamwriter.h

index 35fbfa90b2349f6d10242d5e8aa8766fe42923c2..b451f1786414021e8af0fd6e661741304955eddb 100644 (file)
@@ -1257,7 +1257,6 @@ gpx_wr_init(const char* fname)
 
   writer = new gpsbabel::XmlStreamWriter(oqfile);
   writer->setAutoFormattingIndent(2);
-  writer->setCodec("UTF-8");
   writer->writeStartDocument();
 }
 
index e01a53403eeb13121eb33a41a86667ec79e06366..caa51b9dcf1ee59dfbdb4f1608e9b08cf5c2ba6f 100644 (file)
@@ -427,7 +427,6 @@ kml_wr_init(const char* fname)
 
   writer = new gpsbabel::XmlStreamWriter(oqfile);
   writer->setAutoFormattingIndent(2);
-  writer->setCodec("UTF-8");
 }
 
 /*
@@ -446,7 +445,6 @@ kml_wr_position_init(const char* fname)
    * matters in this mode, turn the pretty formatting off.
    */
   writer->setAutoFormatting(false);
-  writer->setCodec("UTF-8");
 
   max_position_points = atoi(opt_max_position_points);
 }
index abfb230d7fae410560a8b43988e017305c883cc7..d47e341cd84fef96fe92400ab5ace5dee9d8f488 100644 (file)
  */
 
 #include <src/core/xmlstreamwriter.h>
-#include <QtCore/QtGlobal>
 
 #include <QtCore/QFile>
-#if (QT_VERSION < QT_VERSION_CHECK(5, 0, 0))
-#include <QtCore/QRegExp>
-#else
-#include <QtCore/QRegularExpression>
-#endif
+#include <QtCore/QTextCodec>
 #include <QtCore/QXmlStreamWriter>
 
 // As this code began in C, we have several hundred places that write
 namespace gpsbabel
 {
 
-XmlStreamWriter:: XmlStreamWriter(QString* s) : QXmlStreamWriter(s) {}
+XmlTextCodec* XmlTextCodec::instance = new XmlTextCodec();
 
-XmlStreamWriter::XmlStreamWriter(QFile* f) : QXmlStreamWriter(f) {}
-
-#if (QT_VERSION < QT_VERSION_CHECK(5, 0, 0))
-QRegExp XmlStreamWriter::badXml10 = QRegExp("[\\x0000-\\x0008]|[\\x000b-\\x000c]|[\\x000e-\\x001f]");
-#else
-QRegularExpression XmlStreamWriter::badXml10 = QRegularExpression("[\\x00-\\x08]|[\\x0b-\\x0c]|[\\x0e-\\x1f]");
-#endif
+XmlTextCodec::XmlTextCodec() : QTextCodec()
+{
+  utf8Codec = QTextCodec::codecForName("UTF-8");
+}
 
-// Dont emit the attribute if there's nothing interesting in it.
-void XmlStreamWriter::writeOptionalAttribute(const QString& qualifiedName, QString value)
+QByteArray XmlTextCodec::convertFromUnicode(const QChar* chars, int len, QTextCodec::ConverterState* state) const
 {
-  if (!value.isEmpty()) {
-    QXmlStreamWriter::writeAttribute(qualifiedName, value.replace(badXml10, " "));
+// Qt 4.7.4, 4.6.2 don't have IgnoreHeader set on the first call, which can
+// result in a BOM being output by utf8Codec.
+  state->flags |= QTextCodec::IgnoreHeader;
+  QByteArray r = utf8Codec->fromUnicode(chars, len, state);
+  char* data = r.data();
+  for (int i = 0; i < r.size(); i++) {
+    if ((0x00 <= data[i] && data[i] <= 0x08) ||
+        (0x0b <= data[i] && data[i] <= 0x0c) ||
+        (0x0e <= data[i] && data[i] <= 0x1f)) {
+      data[i] = ' ';
+    }
   }
+  return r;
 }
 
-// Dont emit the element if there's nothing interesting in it.
-void XmlStreamWriter::writeOptionalTextElement(const QString& qualifiedName, QString text)
+QString XmlTextCodec::convertToUnicode(const char* chars, int len, QTextCodec::ConverterState* state) const
 {
-  if (!text.isEmpty()) {
-    QXmlStreamWriter::writeTextElement(qualifiedName, text.replace(badXml10, " "));
-  }
+  return utf8Codec->toUnicode(chars, len, state);
+}
+
+int XmlTextCodec::mibEnum() const
+{
+  return UTF8_FOR_XML_MIB;
 }
 
-void XmlStreamWriter::writeAttribute(const QString& qualifiedName, QString value)
+// Our name must not overlap with UTF-8 or it may be returned by QTextCodec::codecForName("UTF-8")
+QByteArray XmlTextCodec::name() const
 {
-  QXmlStreamWriter::writeAttribute(qualifiedName, value.replace(badXml10, " "));
+  return QByteArray("UTF-8-XML");
 }
 
-void XmlStreamWriter::writeCDATA(QString text)
+XmlStreamWriter::XmlStreamWriter(QString* string) : QXmlStreamWriter(string)
 {
-  QXmlStreamWriter::writeCDATA(text.replace(badXml10, " "));
 }
 
-void XmlStreamWriter::writeCharacters(QString text)
+XmlStreamWriter::XmlStreamWriter(QFile* f) : QXmlStreamWriter(f)
 {
-  QXmlStreamWriter::writeCharacters(text.replace(badXml10, " "));
+  setCodec(XmlTextCodec::instance);
 }
 
-void XmlStreamWriter::writeTextElement(const QString& qualifiedName, QString value)
+// We must overide the encoding, we don't want to use XmlTextCode::name().
+void XmlStreamWriter::writeStartDocument()
 {
-  QXmlStreamWriter::writeTextElement(qualifiedName, value.replace(badXml10, " "));
+  writeProcessingInstruction("xml version=\"1.0\" encoding=\"UTF-8\"");
+}
+
+// Dont emit the attribute if there's nothing interesting in it.
+void XmlStreamWriter::writeOptionalAttribute(const QString& qualifiedName, const QString& value)
+{
+  if (!value.isEmpty()) {
+    QXmlStreamWriter::writeAttribute(qualifiedName, value);
+  }
+}
+
+// Dont emit the element if there's nothing interesting in it.
+void XmlStreamWriter::writeOptionalTextElement(const QString& qualifiedName, const QString& text)
+{
+  if (!text.isEmpty()) {
+    QXmlStreamWriter::writeTextElement(qualifiedName, text);
+  }
 }
 
 } // namespace gpsbabel
index 5a15f3a5eb12f7a45e484264d44b661acd83b45c..4dc27fb19bd2aee6aecc294d15ffec103ebf42d5 100644 (file)
 #ifndef XMLSTREAMWRITER_H
 #define XMLSTREAMWRITER_H
 
-#include <QtCore/QtGlobal>
+#include <QtCore/QTextCodec>
 #include <QtCore/QXmlStreamWriter>
 
 class QFile;
-#if (QT_VERSION < QT_VERSION_CHECK(5, 0, 0))
-class QRegExp;
-#else
-class QRegularExpression;
-#endif
 
 namespace gpsbabel
 {
 
-class XmlStreamWriter : public QXmlStreamWriter
+// From the "vendor" range, see:
+// https://www.iana.org/assignments/character-sets/character-sets.xhtml
+const int UTF8_FOR_XML_MIB = 2000;
+
+class XmlTextCodec : public QTextCodec
 {
 private:
-#if (QT_VERSION < QT_VERSION_CHECK(5, 0, 0))
-  static QRegExp badXml10;
-#else
-  static QRegularExpression badXml10;
-#endif
+  QTextCodec* utf8Codec;
+public:
+  XmlTextCodec();
+  static XmlTextCodec *instance;
+  virtual QByteArray name() const;
+  virtual int mibEnum() const;
+protected:
+  virtual QByteArray convertFromUnicode(const QChar* chars, int len, QTextCodec::ConverterState* state) const;
+  virtual QString convertToUnicode(const char* chars, int len, QTextCodec::ConverterState* state) const;
+};
 
+class XmlStreamWriter : public QXmlStreamWriter
+{
 public:
-  XmlStreamWriter(QString* s);
+  XmlStreamWriter(QString* string);
   XmlStreamWriter(QFile* f);
 
-  void writeOptionalAttribute(const QString& qualifiedName, QString value);
-  void writeOptionalTextElement(const QString& qualifiedName, QString text);
-  void writeAttribute(const QString& qualifiedName, QString value);
-  void writeCDATA(QString text);
-  void writeCharacters(QString text);
-  void writeTextElement(const QString& qualifiedName, QString value);
-
+  void writeStartDocument(void);
+  void writeOptionalAttribute(const QString& qualifiedName, const QString& value);
+  void writeOptionalTextElement(const QString& qualifiedName, const QString& text);
 };
 
 } // namespace gpsbabel